/**
 * 2017年5月18日
 */
package cn.edu.bjtu.workbench.tokenization;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.ansj.domain.Result;
import org.ansj.domain.Term;
import org.ansj.splitWord.analysis.ToAnalysis;
import org.deeplearning4j.text.tokenization.tokenizer.Tokenizer;

/**
 * 这个类分词只返回名词,以及自定义词典里面的词
 * @author Alex
 *
 */
public class OnlyNounAnsjTokenizer extends BaseFilterTokenizer implements Tokenizer{

	ToAnalysis ta = new ToAnalysis();
	Iterator<Term> iter = null;
    public OnlyNounAnsjTokenizer(String s){
    	Result result = ta.parseStr(s);
    	iter = result.iterator();
    }
	public OnlyNounAnsjTokenizer(InputStream in) {
		ta = new ToAnalysis(new InputStreamReader(in));
		try {
			iter = ta.parse().iterator();
		} catch (IOException e) {
			e.printStackTrace();
			iter = null;
		}
	}
   
    private boolean skip(Term t){
    	return t==null || t.getName().length() == 1 || !( t.getNatureStr().equals("n") || t.getNatureStr().equals("userDefine"));
    }
    
    String temp ;
    private boolean hasNext0(){
    	temp = null;
    	Term t = null;
    	while(temp == null && iter.hasNext()){
    		t = iter.next();
    		if(!skip(t)){
    			temp = t.getName();	
    		}else{
    			//logger.debug("skip term {} , nature {}",t.getName(),t.getNatureStr());	
    		}
    	}
    	return temp!=null;
    }
	private String nextToken0() {
		return temp;
	}

    
	@Override
	public boolean hasMoreTokens() {
		if(iter == null) return false;
		return hasNext0();
	}


	@Override
	public int countTokens() {
		return 1;
	}


	@Override
	public String nextToken() {
		return nextToken0();
	}


	@Override
	public List<String> getTokens() {
		  List<String> tokens = new ArrayList<>();
	        while (hasMoreTokens()) {
	        	String token = nextToken();
	        	if(!token.equals("")){
	        		tokens.add(token);
	        	}
	        }
	        return tokens;
	}

}
